
#setwd("~/Documents/research/inProgress/philosophical/risk/")
#setwd("yourWorkingDirectory")


### Set working directory to "yourWorkingDirectory" -- wherever you have saved the data downloaded from the UK Data Service

###### Load data

## Loading the full .dta file into R using read.dta() does not work due to duplicated factor levels, for variables that are irrelevant to the subsequent analysis. Open the original data in Stata (13), and run the .do file ehsi_stataprep.do from this replication archive.

### Load required packages
library(readstata13)

###### Load the created dataset into R:

ehsi = read.dta13("hse2011_rsr.dta")
## this returns some warnings about missing factor labels, but the variables in question are numeric and will be treated as such subsequently

######################## Data cleaning and recoding of variables of interest:

## code missing subjective U risk responses as NA
ehsi$losejob = as.numeric(ehsi$Losejob)
ehsi$losejob[which(ehsi$Losejob < 0)] = NA
ehsi$losejob = ehsi$losejob/100

## code missing income responses as NA
ehsi$eqvinc[which(ehsi$eqvinc < 0)] = NA
## create income variable denominated in £000s
ehsi$eqinc.k = ehsi$eqvinc/1000

## code a numeric version of risk-lovingness
ehsi$riskseek = rep(NA, nrow(ehsi))
ehsi$riskseek[which(ehsi$LIKERISK == "Disagree strongly")] = 0
ehsi$riskseek[which(ehsi$LIKERISK == "Disagree")] = 1
ehsi$riskseek[which(ehsi$LIKERISK == "Disagree slightly")] = 2
ehsi$riskseek[which(ehsi$LIKERISK == "Neither agree nor disagree")] = 3
ehsi$riskseek[which(ehsi$LIKERISK == "Agree slightly")] = 4
ehsi$riskseek[which(ehsi$LIKERISK == "Agree")] = 5
ehsi$riskseek[which(ehsi$LIKERISK == "Agree strongly")] = 6

# GOODSELF
# numeric version of feeling good about self
ehsi$feelgood = rep(NA, nrow(ehsi))
ehsi$feelgood[which(ehsi$GOODSELF == "Disagree strongly")] = 0
ehsi$feelgood[which(ehsi$GOODSELF == "Disagree")] = 1
ehsi$feelgood[which(ehsi$GOODSELF == "Disagree slightly")] = 2
ehsi$feelgood[which(ehsi$GOODSELF == "Neither agree nor disagree")] = 3
ehsi$feelgood[which(ehsi$GOODSELF == "Agree slightly")] = 4
ehsi$feelgood[which(ehsi$GOODSELF == "Agree")] = 5
ehsi$feelgood[which(ehsi$GOODSELF == "Agree strongly")] = 6


ehsi$feelgood5 = rep(NA, nrow(ehsi))
ehsi$feelgood5[which(ehsi$Goodme == "None of the time")]=1
ehsi$feelgood5[which(ehsi$Goodme == "Rarely")]=2
ehsi$feelgood5[which(ehsi$Goodme == "Some of the time")]=3
ehsi$feelgood5[which(ehsi$Goodme == "Often")]=4
ehsi$feelgood5[which(ehsi$Goodme == "All of the time")]=5

ehsi$dealprb.n = rep(NA, nrow(ehsi))
ehsi$makemind.n = rep(NA, nrow(ehsi))
ehsi$useful.n = rep(NA, nrow(ehsi))

ehsi$dealprb.n[which(ehsi$Dealprb == "None of the time")]=1
ehsi$dealprb.n[which(ehsi$Dealprb == "Rarely")]=2
ehsi$dealprb.n[which(ehsi$Dealprb == "Some of the time")]=3
ehsi$dealprb.n[which(ehsi$Dealprb == "Often")]=4
ehsi$dealprb.n[which(ehsi$Dealprb == "All of the time")]=5

ehsi$makemind.n[which(ehsi$Makemind == "None of the time")]=1
ehsi$makemind.n[which(ehsi$Makemind == "Rarely")]=2
ehsi$makemind.n[which(ehsi$Makemind == "Some of the time")]=3
ehsi$makemind.n[which(ehsi$Makemind == "Often")]=4
ehsi$makemind.n[which(ehsi$Makemind == "All of the time")]=5

ehsi$useful.n[which(ehsi$Useful == "None of the time")]=1
ehsi$useful.n[which(ehsi$Useful == "Rarely")]=2
ehsi$useful.n[which(ehsi$Useful == "Some of the time")]=3
ehsi$useful.n[which(ehsi$Useful == "Often")]=4
ehsi$useful.n[which(ehsi$Useful == "All of the time")]=5

ehsi$confident.n = rep(NA, nrow(ehsi))
ehsi$confident.n[which(ehsi$Confidet == "None of the time")]=1
ehsi$confident.n[which(ehsi$Confidet == "Rarely")]=2
ehsi$confident.n[which(ehsi$Confidet == "Some of the time")]=3
ehsi$confident.n[which(ehsi$Confidet == "Often")]=4
ehsi$confident.n[which(ehsi$Confidet == "All of the time")]=5

## demographic variables for the individual respondent
# gender: $Sex
ehsi$female= as.numeric(ehsi$Sex == "Female")
# age (last bday): $Age -- already numeric
ehsi$age = ehsi$Age
# household size and composition: Adults, Children Infants (all numeric)
ehsi$hhs = ehsi$Adults + ehsi$Children + ehsi$Infants
# marital status: $marstat
ehsi$marital = as.numeric(ehsi$marstat == "Married" | ehsi$marstat == "Cohabitees" | ehsi$marstat == "Civil partnership including spontaneous answers")
ehsi$marital[which(ehsi$marstat == "Separated" | ehsi$marstat == "Divorced")] =2
ehsi$marital[which(ehsi$marstat == "Widowed")] =3
ehsi$marital[which(ehsi$marstat == "Refused" | ehsi$marstat == "Not applicable")] = NA
ehsi$marital = as.factor(ehsi$marital)
# here: single = 0, partnered = 1, sep/div = 2, widow= 3

# age finished education (grouped): $EducEnd
ehsi$educend.n = as.numeric(levels(ehsi$EducEnd))[ehsi$EducEnd]
ehsi$educend.n[which(ehsi$EducEnd == "Never went to school")] = 10
ehsi$educend.n[which(ehsi$EducEnd == "14 or under")] = 14
ehsi$educend.n[which(ehsi$EducEnd == "19 or over")] = 21

# highest educational qualification: $topqual2 (has category for full time students, separately)

##### respondent
# activity status in the last week $activb
ehsi$lookwk = as.numeric(ehsi$activb == "Looking for paid work or a Government training scheme" | ehsi$activb =="Intending to look for work but prevented by temporary sick")
ehsi$retired = as.numeric(ehsi$activb == "Retired from paid work")
ehsi$out_dis = as.numeric(ehsi$activb == "Permanently unable to work because of long-term sickness/disability")
ehsi$out_home = as.numeric(ehsi$activb == "Looking after home or family" |ehsi$activb == "Doing unpaid work for a business that you/relative owns")

# how long looking for paid employment $HowLong
# full time part time $FtPtime
ehsi$parttime = as.numeric(ehsi$FtPtime == "Part-time")
ehsi$parttime[which(ehsi$FtPtime == "Don't Know" |ehsi$FtPtime == "Item not applicable" | ehsi$FtPtime == "Refusal")] = NA

# self employment $Employe
ehsi$selfemp = as.numeric(ehsi$Employe == "or, self-employed?")
ehsi$selfemp[which(ehsi$Employe == "Item not applicable")] =NA

##### household reference person
# activity status in the last week (household reference person): $hrpactiv
ehsi$hrlookwk = as.numeric(ehsi$hrpactiv == "Looking for paid work or a Government training scheme" | ehsi$hrpactiv =="Intending to look for work but prevented by temporary sick")
ehsi$hrretired = as.numeric(ehsi$hrpactiv == "Retired from paid work")
ehsi$hrout_dis = as.numeric(ehsi$hrpactiv == "Permanently unable to work because of long-term sickness/disability")
ehsi$hrout_home = as.numeric(ehsi$hrpactiv == "Looking after home or family" |ehsi$hrpactiv == "Doing unpaid work for a business that you/relative owns")

# how long looking for paid employment $hrplong
# full time part time $hrpftpt
ehsi$hrparttime = as.numeric(ehsi$hrpftpt == "Part-time")
ehsi$hrparttime[which(ehsi$hrpftpt == "Don't Know" |ehsi$hrpftpt == "Item not applicable" | ehsi$hrpftpt == "Refusal")] = NA

# self employment $hrpemply
ehsi$hrselfemp = as.numeric(ehsi$hrpemply == "or, self-employed?")
ehsi$hrselfemp[which(ehsi$hrpemply == "Item not applicable")] =NA

## health risk: how likely to get seriously ill-- more or less lkely than other people of my age (a relative measure)
# ehsi$SERILL
ehsi$losehealth = rep(NA, nrow (ehsi))
ehsi$losehealth[which(ehsi$SERILL == "Much less likely")] = 1
ehsi$losehealth[which(ehsi$SERILL == "Little less likely")] = 2
ehsi$losehealth[which(ehsi$SERILL == "No more or less likely")] = 3
ehsi$losehealth[which(ehsi$SERILL == "Little more likely")] = 4
ehsi$losehealth[which(ehsi$SERILL == "Much more likely")] = 5

###### creating the dependent variable:
## measures of Rawlsian self-respect 
# two dimensions: worth/value of self and plans; confidence in ability to pursue plans

# binary self-respect measures
ehsi$sr.binary = rep(NA, nrow(ehsi))
ehsi$sr.binary = as.numeric(ehsi$confident.n > 2 & ehsi$useful.n > 2 & ehsi$feelgood5 > 2 & ehsi$dealprb.n > 2)
## 79.6% of people 'have' self respect, under this measure

ehsi$sr.binary2 = as.numeric(ehsi$confident.n > 3 & ehsi$useful.n > 3 & ehsi$feelgood5 > 3 & ehsi$dealprb.n > 3)
## 35.5% have SR on this measurement

### binary measure of worth only (the first "prong"):
ehsi$worth.binary2 = as.numeric( ehsi$useful.n > 3 & ehsi$feelgood5 > 3)

### binary measure excluding explicit "confidence" measure (the first "prong"):
ehsi$exconf.binary2 = as.numeric(ehsi$useful.n > 3 & ehsi$feelgood5 > 3 & ehsi$dealprb.n > 3)

## alternative numeric measures:
# worth
ehsi$worth = ehsi$feelgood5 + ehsi$useful.n
# confidence
ehsi$confidence = ehsi$confident + ehsi$dealprb.n
# numeric self respec measure
ehsi$sr = ehsi$worth + ehsi$confidence

## factor implementation of lose job: 
ehsi$losejob.f = as.factor(ehsi$losejob)

########## externally sourced objective risk measures:
uer = read.csv("uk_occ_gen_unemployment.csv")

## data downloaded from Office for National Statistics, series UNEM02, November 2013 release, accessed 13 Jan 2014. The ONS site has been updated but the same data are archived at http://webarchive.nationalarchives.gov.uk/20160114101732/http://www.ons.gov.uk/ons/rel/lms/labour-market-statistics/november-2013/table-unem02.xls

uer = uer[which(uer$date == "30/03/2011" | uer$date == "30/06/2011" |uer$date == "30/09/2011" |uer$date == "30/12/2011"),c(1:14, 33:42)]
## 1. create quarter indicator-- date-- for respondents' interview dates:
ehsi$qi = rep(NA, nrow(ehsi))
ehsi$qi[which(ehsi$MIntB < 4)] = "30/03/2011"
ehsi$qi[which(ehsi$MIntB > 3 & ehsi$MIntB < 7)] = "30/06/2011"
ehsi$qi[which(ehsi$MIntB > 6 & ehsi$MIntB < 10)] = "30/09/2011"
ehsi$qi[which(ehsi$MIntB > 9)] = "30/12/2011"

ehsi$isco1 = rep(NA, nrow(ehsi))
ehsi$isco1[which(ehsi$SOC2010B == "Corporate managers and directors")] = "managers_senior_officials"
ehsi$isco1[which(ehsi$SOC2010B == "Other managers and proprietors")] = "managers_senior_officials"
ehsi$isco1[which(ehsi$SOC2010B == "Science, research, engineering and technology professionals")] = "professionals"
ehsi$isco1[which(ehsi$SOC2010B == "Health professionals")] = "professionals"
ehsi$isco1[which(ehsi$SOC2010B == "Teaching and educational professionals")] = "professionals"
ehsi$isco1[which(ehsi$SOC2010B == "Business, media and public service professionals")] = "professionals"
ehsi$isco1[which(ehsi$SOC2010B == "Science, engineering and technology associate professionals")] = "assoc_prof_technical"
ehsi$isco1[which(ehsi$SOC2010B == "Health and social care associate professionals")] = "assoc_prof_technical"
ehsi$isco1[which(ehsi$SOC2010B == "Protective service occupations")] = "personal_services"
ehsi$isco1[which(ehsi$SOC2010B == "Culture, media and sports occupations")] = "assoc_prof_technical"
ehsi$isco1[which(ehsi$SOC2010B == "Business and public service associate professionals")] = "assoc_prof_technical"
 ehsi$isco1[which(ehsi$SOC2010B == "Administrative occupations")] = "admin_sec"
ehsi$isco1[which(ehsi$SOC2010B == "Secretarial and related occupations")] = "admin_sec"
ehsi$isco1[which(ehsi$SOC2010B == "Skilled agricultural and related trades")] = "skilled_trades"
ehsi$isco1[which(ehsi$SOC2010B == "Skilled metal, electrical and electronic trades")] = "skilled_trades"
ehsi$isco1[which(ehsi$SOC2010B == "Skilled construction and building trades")] = "skilled_trades"
ehsi$isco1[which(ehsi$SOC2010B == "Textiles, printing and other skilled trades")] = "skilled_trades" 
ehsi$isco1[which(ehsi$SOC2010B == "Caring personal service occupations")] = "personal_services"
ehsi$isco1[which(ehsi$SOC2010B == "Leisure, travel and related personal service occupations")] = "personal_services"
ehsi$isco1[which(ehsi$SOC2010B == "Sales occupations")] = "sales_cust_serv"
ehsi$isco1[which(ehsi$SOC2010B == "Customer service occupations")] = "sales_cust_serv"
ehsi$isco1[which(ehsi$SOC2010B == "Process, plant and machine operatives")] = "proc_plant_machine_op"
ehsi$isco1[which(ehsi$SOC2010B == "Transport and mobile machine drivers and operatives")] = "proc_plant_machine_op"        
ehsi$isco1[which(ehsi$SOC2010B == "Elementary trades and related occupations")] = "elem_occ"
ehsi$isco1[which(ehsi$SOC2010B == "Elementary administration and service occupations")] = "elem_occ"

occ_isco1 = unique(na.omit(ehsi$isco1))
uer$date = as.character(levels(uer$date))[uer$date]
ehsi$occ_gen_uer = rep(NA, nrow(ehsi))
for(i in 1:4){
	for(j in occ_isco1){
		ehsi$occ_gen_uer[which(ehsi$qi == uer$date[i] & ehsi$female == 1 & ehsi$isco1 == j)] = uer[[paste("uer", "w", j, sep = "_")]][i]
		ehsi$occ_gen_uer[which(ehsi$qi == uer$date[i] & ehsi$female == 0 & ehsi$isco1 == j)] = uer[[paste("uer", "m", j, sep = "_")]][i]
	}}
	
## Skill Specificity
## We use Iversen and Soskice's measure for the specificity of skills in different occupations, available at http://www.people.fas.harvard.edu/~iversen/data/Measuring_skill-specificity.xls

#### Download this excel spreadsheet 
### delete rows for 1-digit ISCO codes
### delete columns: (Relative skill specificity) / StDv ; s1 (roughly)
### create a single row for names and rename columns as follows:
## ISCO88 1- or 2-digit number	- ISCO88 2-digit	--> isco2
## Description of occupational group				--> occupation
## Number of unit groups within ISCO classification	--> N_within_class
## Share in ISCO classification						--> Share_in_class
## Empirical share in labor force - female			--> share_lf_f
## Empirical share in labor force - male			--> share_lf_m
## Empirical share in labor force - total			--> share_lf_t
## Absolute skill specificity	- total				--> Absolute skill specificty
## ISCO skill-level									--> ISCO skill-level	
## Relative skill specificity  	 					--> Relative skill specificity
### Save this spreadsheet as .csv in "yourWorkingDirectory"


## Please also see http://www.people.fas.harvard.edu/~iversen/SkillSpecificity.htm, http://www.people.fas.harvard.edu/~iversen/PDFfiles/CusackIversenRehm2005.pdf, and http://www.people.fas.harvard.edu/~iversen/PDFfiles/SocialPreferences.pdf for further details on the concept and measurement of skill specificity.

## load skill specificity data
sksp = read.csv("Measuring_skill-specificity.csv")

## match occupations in the HSE data to those in Iversen-Soskice's data
## soc2010b to isco 2, for skill specificity (as possible?)
ehsi$isco2 = rep(NA, nrow(ehsi))
ehsi$isco2[which(ehsi$SOC2010B == "Corporate managers and directors")] = 12
ehsi$isco2[which(ehsi$SOC2010B == "Other managers and proprietors")] = 13
ehsi$isco2[which(ehsi$SOC2010B == "Science, research, engineering and technology professionals")] = 21
ehsi$isco2[which(ehsi$SOC2010B == "Health professionals")] = 22
ehsi$isco2[which(ehsi$SOC2010B == "Teaching and educational professionals")] = 23
ehsi$isco2[which(ehsi$SOC2010B == "Business, media and public service professionals")] = 24
ehsi$isco2[which(ehsi$SOC2010B == "Science, engineering and technology associate professionals")] = 31
ehsi$isco2[which(ehsi$SOC2010B == "Health and social care associate professionals")] = 32
ehsi$isco2[which(ehsi$SOC2010B == "Protective service occupations")] = 51
ehsi$isco2[which(ehsi$SOC2010B == "Culture, media and sports occupations")] = 34
ehsi$isco2[which(ehsi$SOC2010B == "Business and public service associate professionals")] = 34
ehsi$isco2[which(ehsi$SOC2010B == "Administrative occupations")] = 41
ehsi$isco2[which(ehsi$SOC2010B == "Secretarial and related occupations")] = 41
ehsi$isco2[which(ehsi$SOC2010B == "Skilled agricultural and related trades")] = 61
ehsi$isco2[which(ehsi$SOC2010B == "Skilled metal, electrical and electronic trades")] = 72
ehsi$isco2[which(ehsi$SOC2010B == "Skilled construction and building trades")] = 71
ehsi$isco2[which(ehsi$SOC2010B == "Textiles, printing and other skilled trades")] = 73
ehsi$isco2[which(ehsi$SOC2010B == "Caring personal service occupations")] = 51
ehsi$isco2[which(ehsi$SOC2010B == "Leisure, travel and related personal service occupations")] = 51
ehsi$isco2[which(ehsi$SOC2010B == "Sales occupations")] = 52
ehsi$isco2[which(ehsi$SOC2010B == "Customer service occupations")] = 42
ehsi$isco2[which(ehsi$SOC2010B == "Process, plant and machine operatives")] = 81
ehsi$isco2[which(ehsi$SOC2010B == "Transport and mobile machine drivers and operatives")] =  83
ehsi$isco2[which(ehsi$SOC2010B == "Elementary trades and related occupations")] = 93
ehsi$isco2[which(ehsi$SOC2010B == "Elementary administration and service occupations")] = 91

## generate individual sksp measures, by isco 2d
ehsi = merge(ehsi, sksp[ , c(1, 8, 9, 10)], by = "isco2", all.x = T)
names(ehsi)[which(names(ehsi) == "Absolute.skill.specificity")] = "sksp.abs"
names(ehsi)[which(names(ehsi) == "Relative.skill.specificity")] = "sksp.rel"

## unemployment risk measures for the HRP, where respondent is not HRP:

ehsi$isco1hrp = rep(NA, nrow(ehsi))
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Corporate managers and directors")] = "managers_senior_officials"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Other managers and proprietors")] = "managers_senior_officials"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Science, research, engineering and technology professionals")] = "professionals"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Health professionals")] = "professionals"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Teaching and educational professionals")] = "professionals"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Business, media and public service professionals")] = "professionals"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Science, engineering and technology associate professionals")] = "assoc_prof_technical"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Health and social care associate professionals")] = "assoc_prof_technical"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Protective service occupations")] = "personal_services"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Culture, media and sports occupations")] = "assoc_prof_technical"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Business and public service associate professionals")] = "assoc_prof_technical"
 ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Administrative occupations")] = "admin_sec"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Secretarial and related occupations")] = "admin_sec"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Skilled agricultural and related trades")] = "skilled_trades"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Skilled metal, electrical and electronic trades")] = "skilled_trades"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Skilled construction and building trades")] = "skilled_trades"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Textiles, printing and other skilled trades")] = "skilled_trades" 
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Caring personal service occupations")] = "personal_services"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Leisure, travel and related personal service occupations")] = "personal_services"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Sales occupations")] = "sales_cust_serv"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Customer service occupations")] = "sales_cust_serv"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Process, plant and machine operatives")] = "proc_plant_machine_op"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Transport and mobile machine drivers and operatives")] = "proc_plant_machine_op"        
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Elementary trades and related occupations")] = "elem_occ"
ehsi$isco1hrp[which(ehsi$HRPSOC10B == "Elementary administration and service occupations")] = "elem_occ"

occ_isco1hrp = unique(na.omit(ehsi$isco1hrp))
ehsi$occ_gen_uerhrp = rep(NA, nrow(ehsi))
for(i in 1:4){
	for(j in occ_isco1hrp){
		ehsi$occ_gen_uerhrp[which(ehsi$qi == uer$date[i] & ehsi$female == 1 & ehsi$isco1hrp == j)] = uer[[paste("uer", "w", j, sep = "_")]][i]
		ehsi$occ_gen_uerhrp[which(ehsi$qi == uer$date[i] & ehsi$female == 0 & ehsi$isco1hrp == j)] = uer[[paste("uer", "m", j, sep = "_")]][i]
	}}
	